##########################################################################
## Replication syntax for Malet & Hegewald (2025, JEPP):
## "The Changing Geography of Support for European Integration in the Shadow of the Ukraine War"
## Results appendix
##########################################################################

# R version: 4.5.1 
# Platform: x86_64-apple-darwin20

# here 1.0.1
# tidyverse 2.0.0
# eurostat 4.0.0
# fixest 0.12.1
# broom 1.0.9
# ggpubr 0.6.1
# kableExtra 1.4.0

# 1. Load packages #######################################################
library(here)
library(tidyverse)
library(eurostat)
library(fixest)
library(broom)
library(ggpubr)
library(kableExtra)

# 2. Basic setup #########################################################
rm(list=ls())
i_am("03_results_appendix.R")

# 3. Import data ##########################################################
data <- read.csv("survey_data.csv")

# 4. Create event study data ##############################################
data_event <- data %>%
  mutate(date = ymd(date, quiet = TRUE))

median_dates <- data_event %>%
  group_by(studyno1) %>%
  summarize(
    median_date = median(date, na.rm = TRUE),
    start_date  = min(date, na.rm = TRUE),
    .groups = "drop"
  )

data_event_fin <- left_join(data_event, median_dates, by = "studyno1") 

data_event_fin <- data_event_fin %>%
  filter(!median_date %in% as.Date(c("2020-11-07", "2021-09-28"))) 
names(data_event_fin)

# 5. Table A1 & A2 #######################################################
data_summary <- data_event_fin %>% 
  mutate(across(where(is.character), as.factor))

data_summary <- data_summary %>% select(
  Proposals_CommonDefence, Proposals_CommonForeign,
  Proposals_EnlargeEU, Proposals_CommonEnergy, 
  closeness_to_russia, exposure,
  gender, age, ageEduc, locality, profession, leftRight,
  polar_eu_position, weighted_avg_eu_position
)

numeric_vars <- data_summary %>% select(where(is.numeric))

numeric_summary <- numeric_vars %>%
  summarise(across(everything(),
                   list(
                     Mean = ~round(mean(., na.rm = TRUE), 1),
                     SD   = ~round(sd(., na.rm = TRUE), 1),
                     Min  = ~round(min(., na.rm = TRUE), 1),
                     Max  = ~round(max(., na.rm = TRUE), 1)
                   ),
                   .names = "{.col}_{.fn}")) %>%
  pivot_longer(
    everything(),
    names_to = c("Variable", ".value"),
    names_pattern = "^(.*)_(Mean|SD|Min|Max)$"
  )

numeric_table <- kable(
  numeric_summary,
  format = "latex", 
  booktabs = TRUE,
  caption = "Descriptive statistics for numeric variables.",
  align = c("l", "r", "r", "r", "r")
) %>%
  kable_styling(latex_options = "hold_position")

print(numeric_table)

factor_vars <- data_summary %>% select(where(is.factor))

factor_summary <- factor_vars %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Category") %>%
  filter(!is.na(Category)) %>%
  group_by(Variable, Category) %>%
  summarise(N = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(Percent = round(N / sum(N) * 100, 1)) %>%
  ungroup()

factor_table <- kable(
  factor_summary,
  format = "latex",
  booktabs = TRUE,
  col.names = c("Variable", "Category", "N", "%"),
  caption = "Category distribution for factor variables.",
  align = c("l", "l", "r", "r")
) %>%
  kable_styling(latex_options = "hold_position")

print(factor_table)

# 6. Figure A1 ###########################################################
hist <- data_event_fin %>% select(uniqid, studyno1, NUTScode)
hist$wave <- factor(data_event_fin$studyno1,
                    levels = c(7649, 7780, 7783, 7848, 7888, 7902, 7953, 7997),
                    labels = c(
                      "Eurobarometer 93.1",
                      "Eurobarometer 94.3",
                      "Eurobarometer 95.3",
                      "Eurobarometer 96.3",
                      "Eurobarometer 97.3",
                      "Eurobarometer 97.5",
                      "Eurobarometer 98.2",
                      "Eurobarometer 99.4"
                    )
)

hist_summary <- hist %>%
  filter(!is.na(wave)) %>%             
  group_by(wave, NUTScode) %>%
  summarise(n_uniqids = n_distinct(uniqid), .groups = "drop")

means <- hist_summary %>%
  group_by(wave) %>%
  summarise(mean_n = mean(n_uniqids), .groups = "drop")

histo <- ggplot(hist_summary, aes(x = n_uniqids)) +
  geom_histogram(binwidth = 5, color = "black", fill = "skyblue") +
  geom_vline(data = means, aes(xintercept = mean_n), linetype = "dashed", color = "red") +
  facet_wrap(~ wave) +
  labs(
    x = "Number of unique respondents",
    y = "Number of NUTS regions"
  ) +
  theme_minimal()

png(file = here("FigureA1.png"), width = 7, height = 7, units = "in", res = 800)
histo
dev.off()

# 7. Figure A2 ###########################################################
data_map <- data_event_fin %>% select(NUTScode, closeness_to_russia)

nuts1 <- get_eurostat_geospatial(nuts_level = 1, year=2021)
nuts2 <- get_eurostat_geospatial(nuts_level = 2, year=2021)
nuts3 <- get_eurostat_geospatial(nuts_level = 3, year=2021)
nuts_shapes <- bind_rows(nuts1, nuts2, nuts3)

nuts_shapes <- nuts_shapes %>% rename(GEO = NUTS_ID)

nuts_shapes <- nuts_shapes %>% rename(NUTScode = id) %>% 
  select(NUTScode, geometry)

data_map <- data_map %>% left_join(nuts_shapes, by = "NUTScode")
data_map <- unique(data_map)

FigA2 <- ggplot(data_map) +
  geom_sf(aes(geometry = geometry, fill = closeness_to_russia)) +
  scale_fill_viridis_c(
    option = "viridis", 
    name = "Logged closeness to Russia (rescaled 0-1)",
    breaks = c(0, 0.5, 1)  
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "bottom")

png(file = here("FigureA2.png"), width = 12, height = 8, units = "in", res = 800)
FigA2
dev.off()

# 8. Figure A3 ###########################################################
data_sub <- data_event_fin %>%
  mutate(
    median_closeness = median(closeness_to_russia, na.rm = TRUE),
    closeness_group = if_else(closeness_to_russia <= median_closeness, "far", "close")
  )

ds <- data_sub %>%
  group_by(median_date, closeness_group) %>%
  summarise(
    Defence_mean = mean(Proposals_CommonDefence, na.rm = TRUE) * 100,
    Defence_se = sd(Proposals_CommonDefence, na.rm = TRUE) / sqrt(sum(!is.na(Proposals_CommonDefence))) * 100,
    Foreign_mean = mean(Proposals_CommonForeign, na.rm = TRUE) * 100,
    Foreign_se = sd(Proposals_CommonForeign, na.rm = TRUE) / sqrt(sum(!is.na(Proposals_CommonForeign))) * 100,
    Enlargement_mean = mean(Proposals_EnlargeEU, na.rm = TRUE) * 100,
    Enlargement_se = sd(Proposals_EnlargeEU, na.rm = TRUE) / sqrt(sum(!is.na(Proposals_EnlargeEU))) * 100,
    Energy_mean = mean(Proposals_CommonEnergy, na.rm = TRUE) * 100,
    Energy_se = sd(Proposals_CommonEnergy, na.rm = TRUE) / sqrt(sum(!is.na(Proposals_CommonEnergy))) * 100,
    .groups = "drop"
  ) %>%
  mutate(
    Defence_lower = Defence_mean - 1.96 * Defence_se,
    Defence_upper = Defence_mean + 1.96 * Defence_se,
    Foreign_lower = Foreign_mean - 1.96 * Foreign_se,
    Foreign_upper = Foreign_mean + 1.96 * Foreign_se,
    Enlargement_lower = Enlargement_mean - 1.96 * Enlargement_se,
    Enlargement_upper = Enlargement_mean + 1.96 * Enlargement_se,
    Energy_lower = Energy_mean - 1.96 * Energy_se,
    Energy_upper = Energy_mean + 1.96 * Energy_se
  )

data_long <- ds %>%
  select(median_date, closeness_group,
         Defence_mean, Defence_lower, Defence_upper,
         Foreign_mean, Foreign_lower, Foreign_upper,
         Enlargement_mean, Enlargement_lower, Enlargement_upper,
         Energy_mean, Energy_lower, Energy_upper) %>%
  pivot_longer(
    cols = -c(median_date, closeness_group),
    names_to = c("policy_area", ".value"),
    names_pattern = "(.*)_(mean|lower|upper)"
  ) %>%
  rename(support = mean) %>%
  mutate(policy_area = factor(policy_area, levels = c("Defence", "Foreign", "Enlargement", "Energy")))

FigA3 <- ggplot(data_long, aes(x = median_date, y = support, color = closeness_group, group = closeness_group)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, fill = closeness_group), alpha = 0.2, color = NA) +
  geom_line(size = 1, na.rm = TRUE) +
  geom_point(size = 2, na.rm = TRUE) +
  facet_wrap(~policy_area, scales = "free_y") +
  labs(
    x = "",
    y = "Support (in %)",
    color = "Closeness to Russia",
    fill = "Closeness to Russia"
  ) +
  theme_minimal(base_size = 17) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5, size = 16),
    legend.title = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "6 months") +
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  geom_vline(xintercept = as.numeric(as.Date("2022-02-24")), linetype = "dashed", color = "red", size = 1)

ggsave(file=here("FigureA3.png"), width = 14, height = 10, bg = "white")
FigA3 
dev.off()

# 9. Figure A4 ###########################################################
event_plot <- function(model, title_label, y_label) {
  tidy(model, conf.int = TRUE) %>%
    filter(grepl("closeness_to_russia", term)) %>%
    mutate(date = as.Date(gsub(".*::", "", term))) %>%
    bind_rows(
      tibble(
        term = "baseline",
        estimate = 0,
        std.error = NA,
        statistic = NA,
        p.value = NA,
        conf.low = 0,
        conf.high = 0,
        date = as.Date("2022-01-29")
      )
    ) %>%
    ggplot(aes(x = date, y = estimate, ymin = conf.low, ymax = conf.high)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
    geom_vline(xintercept = as.Date("2022-01-29"), linetype = "dashed", color = "red", size = 1) +
    geom_pointrange(color = "black") +
    labs(
      x = "",
      y = y_label,
      title = title_label
    ) +
    scale_x_date(
      date_labels = "%b %Y",
      date_breaks = "6 months"
    ) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5),
      axis.text.x = element_text(angle = 90, hjust = 1, color = "black"),
      axis.text.y = element_text(color = "black"),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
      panel.grid.major = element_line(color = "gray90"),
      panel.grid.minor = element_blank()
    )
}

mod1 <- feols(Proposals_CommonTrade ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod2 <- feols(Proposals_CommonMigration ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod3 <- feols(Proposals_SingleCurrency ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod4 <- feols(Proposals_DigitalMarket ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod5 <- feols(Proposals_freeMove ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod6 <- feols(Proposals_CommonAsylum ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod7 <- feols(Proposals_ReinforceBorders ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod8 <- feols(Proposals_FundSME ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod9 <- feols(Proposals_GenderEqual ~  
                i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                i(median_date, exposure, ref = as.Date("2022-01-29")) +
                gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                polar_eu_position:factor(median_date) +
                weighted_avg_eu_position:factor(median_date) |
                NUTScode + factor(median_date),
              cluster = ~NUTScode, data = data_event_fin)

mod10 <- feols(Proposals_TradeAgreement ~  
                 i(median_date, closeness_to_russia, ref = as.Date("2022-01-29")) +
                 i(median_date, exposure, ref = as.Date("2022-01-29")) +
                 gender + age + ageEduc + locality + profession + leftRight + I(leftRight^2)+ 
                 polar_eu_position:factor(median_date) +
                 weighted_avg_eu_position:factor(median_date) |
                 NUTScode + factor(median_date),
               cluster = ~NUTScode, data = data_event_fin)

etable(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9, mod10,
       tex = TRUE,
       dict = c(
         "Proposals_CommonTrade" = "Common Trade Policy",
         "Proposals_CommonMigration" = "Common Migration Policy",
         "Proposals_SingleCurrency" = "Single Currency",
         "Proposals_DigitalMarket" = "Digital Market",
         "Proposals_freeMove" = "Free Movement",
         "Proposals_CommonAsylum" = "Common Asylum Policy",
         "Proposals_ReinforceBorders" = "Reinforce Borders",
         "Proposals_FundSME" = "Fund SMEs",
         "Proposals_GenderEqual" = "Gender Equality",
         "Proposals_TradeAgreement" = "Trade Agreements"
       ))

p1 <- event_plot(mod1, "Trade", "Effect on Support \nfor Common Trade Policy")
p2 <- event_plot(mod2, "Migration", "Effect on Support \nfor Common Migration Policy")
p3 <- event_plot(mod3, "Single Currency", "Effect on Support \nfor Single Currency")
p4 <- event_plot(mod4, "Digital Market", "Effect on Support \nfor Digital Market")
p5 <- event_plot(mod5, "Free Movement", "Effect on Support \nfor Free Movement")
p6 <- event_plot(mod6, "Asylum", "Effect on Support \nfor Common Asylum Policy")
p7 <- event_plot(mod7, "Reinforce Borders", "Effect on Support \nfor Reinforce Borders")
p8 <- event_plot(mod8, "Fund SME", "Effect on Support \nfor Fund SME")
p9 <- event_plot(mod9, "Gender Equality", "Effect on Support \nfor Gender Equality")
p10 <- event_plot(mod10, "Trade Agreement", "Effect on Support \nfor Trade Agreement")

combined_plots <- ggarrange(
  p1, p2, p3, p4, p5,
  p6, p7, p8, p9, p10,
  ncol = 2, nrow = 5,
  labels = LETTERS[1:10],
  align = "hv"
)

png(file = here("FigureA4.png"), width = 7, height = 16, units = "in", res = 800)
combined_plots
dev.off()

# 10. Figure A5 ###########################################################
event_plot <- function(model, title_label, y_label) {
  tidy(model, conf.int = TRUE) %>%
    filter(grepl("exposure", term)) %>%
    mutate(date = as.Date(gsub(".*::", "", term))) %>%
    bind_rows(
      tibble(
        term = "baseline",
        estimate = 0,
        std.error = NA,
        statistic = NA,
        p.value = NA,
        conf.low = 0,
        conf.high = 0,
        date = as.Date("2022-01-29")
      )
    ) %>%
    ggplot(aes(x = date, y = estimate, ymin = conf.low, ymax = conf.high)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
    geom_vline(xintercept = as.Date("2022-01-29"), linetype = "dashed", color = "red", size = 1) +
    geom_pointrange(color = "black") +
    labs(
      x = "",
      y = y_label,
      title = title_label
    ) +
    scale_x_date(
      date_labels = "%b %Y",
      date_breaks = "6 months"
    ) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5),
      axis.text.x = element_text(angle = 90, hjust = 1, color = "black"),
      axis.text.y = element_text(color = "black"),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
      panel.grid.major = element_line(color = "gray90"),
      panel.grid.minor = element_blank()
    )
}

p1 <- event_plot(mod1, "Trade", "Effect on Support \nfor Common Trade Policy")
p2 <- event_plot(mod2, "Migration", "Effect on Support \nfor Common Migration Policy")
p3 <- event_plot(mod3, "Single Currency", "Effect on Support \nfor Single Currency")
p4 <- event_plot(mod4, "Digital Market", "Effect on Support \nfor Digital Market")
p5 <- event_plot(mod5, "Free Movement", "Effect on Support \nfor Free Movement")
p6 <- event_plot(mod6, "Asylum", "Effect on Support \nfor Common Asylum Policy")
p7 <- event_plot(mod7, "Reinforce Borders", "Effect on Support \nfor Reinforce Borders")
p8 <- event_plot(mod8, "Fund SME", "Effect on Support \nfor Fund SME")
p9 <- event_plot(mod9, "Gender Equality", "Effect on Support \nfor Gender Equality")
p10 <- event_plot(mod10, "Trade Agreement", "Effect on Support \nfor Trade Agreement")

combined_plots <- ggarrange(
  p1, p2, p3, p4, p5,
  p6, p7, p8, p9, p10,
  ncol = 2, nrow = 5,
  labels = LETTERS[1:10],
  align = "hv"
)

png(file = here("FigureA5.png"), width = 7, height = 16, units = "in", res = 800)
combined_plots
dev.off()

# 11. Figure A6 ###########################################################
data_did <- data_event_fin %>%
  mutate(
    closeness_tercile = ntile(closeness_to_russia, 3),
    post = ifelse(median_date > as.Date("2022-01-29"), 1, 0)
  ) %>%
  filter(median_date >= as.Date("2022-01-29"))

data_did$closeness_tercile <- factor(data_did$closeness_tercile)

mod1_did <- feols(
  Proposals_CommonDefence ~  
    i(post, closeness_tercile, ref = 0, ref2 = 1) +   
    i(post, exposure, ref = 0) +
    gender + age + ageEduc + locality + profession + 
    leftRight + I(leftRight^2) +
    polar_eu_position:factor(median_date) +
    weighted_avg_eu_position:factor(median_date) |
    NUTScode + factor(median_date),
  cluster = ~NUTScode, data = data_did
)

mod2_did <- feols(
  Proposals_CommonForeign ~  
    i(post, closeness_tercile, ref = 0, ref2 = 1) +
    i(post, exposure, ref = 0) +
    gender + age + ageEduc + locality + profession +
    leftRight + I(leftRight^2) +
    polar_eu_position:factor(median_date) +
    weighted_avg_eu_position:factor(median_date) |
    NUTScode + factor(median_date),
  cluster = ~NUTScode, data = data_did
)

mod3_did <- feols(
  Proposals_EnlargeEU ~  
    i(post, closeness_tercile, ref = 0, ref2 = 1) +
    i(post, exposure, ref = 0) +
    gender + age + ageEduc + locality + profession +
    leftRight + I(leftRight^2) +
    polar_eu_position:factor(median_date) +
    weighted_avg_eu_position:factor(median_date) |
    NUTScode + factor(median_date),
  cluster = ~NUTScode, data = data_did
)

mod4_did <- feols(
  Proposals_CommonEnergy ~  
    i(post, closeness_tercile, ref = 0, ref2 = 1) +
    i(post, exposure, ref = 0) +
    gender + age + ageEduc + locality + profession +
    leftRight + I(leftRight^2) +
    polar_eu_position:factor(median_date) +
    weighted_avg_eu_position:factor(median_date) |
    NUTScode + factor(median_date),
  cluster = ~NUTScode, data = data_did
)

get_two <- function(m, outcome){
  broom::tidy(m, conf.int = TRUE) %>%
    filter(term %in% c("post::1:closeness_tercile::2",
                       "post::1:closeness_tercile::3")) %>%
    mutate(term = recode(term,
                         "post::1:closeness_tercile::2" = "Post × Medium vs Low",
                         "post::1:closeness_tercile::3" = "Post × High vs Low"
    ),
    outcome = outcome)
}

df_plot <- bind_rows(
  get_two(mod1_did, "Common Defence"),
  get_two(mod2_did, "Common Foreign"),
  get_two(mod3_did, "Enlarge EU"),
  get_two(mod4_did, "Common Energy")
)

xlim_all <- range(c(df_plot$conf.low, df_plot$conf.high), na.rm = TRUE)

combined_plots <- ggplot(df_plot, aes(x = estimate, y = term, xmin = conf.low, xmax = conf.high)) +
  geom_point() +
  geom_errorbarh(height = 0.2) +
  facet_wrap(~ outcome, scales = "fixed") +   
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(
    title = "",
    x = "DiD effects by closeness to Russia \n(Low tercile as baseline)", y = NULL
  ) +
  coord_cartesian(xlim = xlim_all) +
  theme_minimal()

png(file = here("FigureA6.png"), width = 6, height = 3, units = "in", res = 800)
combined_plots
dev.off()

